# Scrapy settings for TpyCar project
#
# For simplicity, this file contains only settings considered important or
# commonly used. You can find more settings consulting the documentation:
#
#     https://docs.scrapy.org/en/latest/topics/settings.html
#     https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
#     https://docs.scrapy.org/en/latest/topics/spider-middleware.html

BOT_NAME = "TpyCar"

SPIDER_MODULES = ["TpyCar.spiders"]
NEWSPIDER_MODULE = "TpyCar.spiders"

ADDONS = {}

# Crawl responsibly by identifying yourself (and your website) on the user-agent
# USER_AGENT = "TpyCar (+http://www.yourdomain.com)"

# Obey robots.txt rules
# 一定要修改  否则可能抓取不到数据
ROBOTSTXT_OBEY = False

# Concurrency and throttling settings
# CONCURRENT_REQUESTS = 16
CONCURRENT_REQUESTS_PER_DOMAIN = 1
DOWNLOAD_DELAY = 1

# Disable cookies (enabled by default)
COOKIES_ENABLED = True

# Disable Telnet Console (enabled by default)
# TELNETCONSOLE_ENABLED = False

# Override the default request headers:
# 设置请求头信息
DEFAULT_REQUEST_HEADERS = {
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
    "Accept-Language": "en",
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36',
    'Cookie': 'u=2onpxmdz; channel=8290; u4ad=2onbpawa; AplocationWap=%7B%22regionId%22%3A297%2C%22regionName%22%3A%22%E5%90%88%E8%82%A5%22%7D; pcsuv=1752192406909.a.825800129; pcuvdata=lastAccessTime=1752201344252|visits=4; ivy_look_number_686989_1823595=3; iwt_uuid=02e71e6f-862f-4b80-882a-30a2bdcc72ea; pcLocate=%7B%22proCode%22%3A%22340000%22%2C%22pro%22%3A%22%E5%AE%89%E5%BE%BD%22%2C%22proId%22%3A28%2C%22cityCode%22%3A%22340100%22%2C%22city%22%3A%22%E5%90%88%E8%82%A5%22%2C%22cityId%22%3A297%2C%22url%22%3A%22%2F%2Fwww.pcauto.com.cn%2Fqcbj%2Fhf%2F%22%2C%22dataType%22%3A%22baiduip%22%2C%22version%22%3A%223.1.2%22%2C%22msg%22%3A%22geo-4%2Cbaiduip-0%22%2C%22point%22%3A%7B%7D%2C%22expires%22%3A1753497346875%7D; ivy_look_number_104412_1825696=1; ivy_look_number_113869_1825316=3; ivy_look_number_695754_1824555=4; ivy_look_number_697653_1825588=4; ivy_look_number_695754_1822857=5'
}

# Enable or disable spider middlewares
# See https://docs.scrapy.org/en/latest/topics/spider-middleware.html
# SPIDER_MIDDLEWARES = {
#    "TpyCar.middlewares.TpycarSpiderMiddleware": 543,
# }

# Enable or disable downloader middlewares
# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
# DOWNLOADER_MIDDLEWARES = {
#    "TpyCar.middlewares.TpycarDownloaderMiddleware": 543,
# }

# Enable or disable extensions
# See https://docs.scrapy.org/en/latest/topics/extensions.html
# EXTENSIONS = {
#    "scrapy.extensions.telnet.TelnetConsole": None,
# }

# Configure item pipelines
# See https://docs.scrapy.org/en/latest/topics/item-pipeline.html

# 开启管道
ITEM_PIPELINES = {
    "TpyCar.pipelines.TpycarPipeline": 300,
    # 注册自定义管道文件
    'TpyCar.pipelines.MysqlPipeline': 350
}

# Enable and configure the AutoThrottle extension (disabled by default)
# See https://docs.scrapy.org/en/latest/topics/autothrottle.html
# AUTOTHROTTLE_ENABLED = True
# The initial download delay
# AUTOTHROTTLE_START_DELAY = 5
# The maximum download delay to be set in case of high latencies
# AUTOTHROTTLE_MAX_DELAY = 60
# The average number of requests Scrapy should be sending in parallel to
# each remote server
# AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
# Enable showing throttling stats for every response received:
# AUTOTHROTTLE_DEBUG = False

# Enable and configure HTTP caching (disabled by default)
# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
# HTTPCACHE_ENABLED = True
# HTTPCACHE_EXPIRATION_SECS = 0
# HTTPCACHE_DIR = "httpcache"
# HTTPCACHE_IGNORE_HTTP_CODES = []
# HTTPCACHE_STORAGE = "scrapy.extensions.httpcache.FilesystemCacheStorage"

# Set settings whose default value is deprecated to a future-proof value
FEED_EXPORT_ENCODING = "utf-8"
